{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "c261e5f4-17a8-40da-beb9-599f1717e0fe",
   "metadata": {},
   "source": [
    "### 1. 安装HuggingFace 并下载模型到本地"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "02785614-9268-41c8-85a5-d579490edbbf",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!pip install huggingface-hub -Uqq\n",
    "!pip install -U sagemaker"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9e6bd7ee-16a3-4f5a-8857-8bbba83eb9e7",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from huggingface_hub import snapshot_download\n",
    "from pathlib import Path\n",
    "\n",
    "local_model_path = Path(\"./LLM_chatglm2_model\")\n",
    "local_model_path.mkdir(exist_ok=True)\n",
    "model_name = \"THUDM/chatglm2-6b\"\n",
    "commit_hash = \"b259b27320263629b0afccef134c54028233673d\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "94e8abc5-a58e-40e2-b1e6-fbf48307c716",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "snapshot_download(repo_id=model_name, revision=commit_hash, cache_dir=local_model_path)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "8d666c79-b039-4258-ac3b-46b19e63c3b8",
   "metadata": {},
   "source": [
    "### 2. 把模型拷贝到S3为后续部署做准备"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e9431deb-6359-442d-847b-1563f8dd3854",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import sagemaker\n",
    "from sagemaker import image_uris\n",
    "import boto3\n",
    "import os\n",
    "import time\n",
    "import json\n",
    "\n",
    "role = sagemaker.get_execution_role()  # execution role for the endpoint\n",
    "sess = sagemaker.session.Session()  # sagemaker session for interacting with different AWS APIs\n",
    "bucket = sess.default_bucket()  # bucket to house artifacts\n",
    "\n",
    "region = sess._region_name\n",
    "account_id = sess.account_id()\n",
    "\n",
    "s3_client = boto3.client(\"s3\")\n",
    "sm_client = boto3.client(\"sagemaker\")\n",
    "smr_client = boto3.client(\"sagemaker-runtime\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "40dd8f16-ae7c-48bf-8e52-1a15425fa74d",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "s3_model_prefix = \"LLM-RAG/workshop/LLM_chatglm2_model\"  # folder where model checkpoint will go\n",
    "model_snapshot_path = list(local_model_path.glob(\"**/snapshots/*\"))[0]\n",
    "s3_code_prefix = \"LLM-RAG/workshop/LLM_chatglm2_deploy_code\"\n",
    "print(f\"s3_code_prefix: {s3_code_prefix}\")\n",
    "print(f\"model_snapshot_path: {model_snapshot_path}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "067292c9-c066-4649-a61f-b460a24da584",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!aws s3 cp --recursive {model_snapshot_path} s3://{bucket}/{s3_model_prefix}"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "696b70c3-90f1-4175-95bf-568bafbcd383",
   "metadata": {},
   "source": [
    "### 3. 模型部署准备（entrypoint脚本，容器镜像，服务配置）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6f7c4277-4480-42c6-aee6-1fbcca94eb82",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "inference_image_uri = (\n",
    "    f\"763104351884.dkr.ecr.{region}.amazonaws.com/djl-inference:0.23.0-deepspeed0.9.5-cu118\"\n",
    ")\n",
    "\n",
    "#中国区需要替换为下面的image_uri\n",
    "# inference_image_uri = (\n",
    "#     f\"727897471807.dkr.ecr.{region}.amazonaws.com.cn/djl-inference:0.21.0-deepspeed0.8.3-cu117\"\n",
    "# )\n",
    "\n",
    "print(f\"Image going to be used is ---- > {inference_image_uri}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8d771bdb-11d2-45d2-9bef-face29221838",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!mkdir -p LLM_chatglm2_deploy_code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e5348ecb-43df-4094-97d8-a6723004862a",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%writefile LLM_chatglm2_deploy_code/model.py\n",
    "from djl_python import Input, Output\n",
    "import torch\n",
    "import logging\n",
    "import math\n",
    "import os\n",
    "\n",
    "from transformers import pipeline, AutoModel, AutoTokenizer\n",
    "\n",
    "def load_model(properties):\n",
    "    tensor_parallel = properties[\"tensor_parallel_degree\"]\n",
    "    model_location = properties['model_dir']\n",
    "    if \"model_id\" in properties:\n",
    "        model_location = properties['model_id']\n",
    "    logging.info(f\"Loading model in {model_location}\")\n",
    "    \n",
    "    tokenizer = AutoTokenizer.from_pretrained(model_location, trust_remote_code=True)\n",
    "   \n",
    "    model = AutoModel.from_pretrained(model_location, trust_remote_code=True).half().cuda()\n",
    "    \n",
    "    # model.requires_grad_(False)\n",
    "    # model.eval()\n",
    "    \n",
    "    return model, tokenizer\n",
    "\n",
    "\n",
    "model = None\n",
    "tokenizer = None\n",
    "generator = None\n",
    "\n",
    "\n",
    "def handle(inputs: Input):\n",
    "    global model, tokenizer\n",
    "    if not model:\n",
    "        model, tokenizer = load_model(inputs.get_properties())\n",
    "\n",
    "    if inputs.is_empty():\n",
    "        return None\n",
    "    data = inputs.get_as_json()\n",
    "    \n",
    "    input_sentences = data[\"inputs\"]\n",
    "    params = data[\"parameters\"]\n",
    "    history = data[\"history\"]\n",
    "    \n",
    "    # chat(tokenizer, query: str, history: List[Tuple[str, str]] = None, \n",
    "    # max_length: int = 2048, num_beams=1, do_sample=True, top_p=0.7, \n",
    "    # temperature=0.95, logits_processor=None, **kwargs)\n",
    "    response, history = model.chat(tokenizer, input_sentences, history=history, **params)\n",
    "    \n",
    "    result = {\"outputs\": response, \"history\" : history}\n",
    "    return Output().add_as_json(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "68348a83-627b-4924-a216-c2946441659b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "print(f\"option.s3url ==> s3://{bucket}/{s3_model_prefix}/\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "a06d1e60-3914-4059-a08f-05ac26761165",
   "metadata": {},
   "source": [
    "#### Note: option.s3url 需要按照自己的账号进行修改, 可以拷贝上一个cell的输出"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8996fe44-8e70-468b-abc1-38187cb33f4f",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%writefile LLM_chatglm2_deploy_code/serving.properties\n",
    "engine=Python\n",
    "option.tensor_parallel_degree=1\n",
    "option.s3url = s3://sagemaker-us-west-2-106839800180/LLM-RAG/workshop/LLM_chatglm2_model/"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "feef22a2-27b9-4018-a46b-6a99b532512f",
   "metadata": {},
   "source": [
    "#### 注意: 必须把transformers升级到4.27.1以上，否则会出现 [Issue344](https://github.com/THUDM/ChatGLM-6B/issues/344)\n",
    "\n",
    "如果是中国区建议添加国内的pip镜像,如下代码所示\n",
    "```\n",
    "%%writefile LLM_chatglm_deploy_code/requirements.txt\n",
    "-i https://pypi.tuna.tsinghua.edu.cn/simple\n",
    "transformers==4.28.1\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7b7e76c6-6dbc-47fc-9f47-4765c526ab76",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%writefile LLM_chatglm2_deploy_code/requirements.txt\n",
    "transformers==4.28.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0ae6734a-aacd-410d-818d-0a962697c3c4",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!rm model.tar.gz\n",
    "!cd LLM_chatglm2_deploy_code && rm -rf \".ipynb_checkpoints\"\n",
    "!tar czvf model.tar.gz LLM_chatglm2_deploy_code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0f77dc76-6d8c-4665-ba88-f03e887c136c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "s3_code_artifact = sess.upload_data(\"model.tar.gz\", bucket, s3_code_prefix)\n",
    "print(f\"S3 Code or Model tar ball uploaded to --- > {s3_code_artifact}\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "a5853daa-b8a3-4485-8c0a-64bf83e93a18",
   "metadata": {},
   "source": [
    "### 4. 创建模型 & 创建endpoint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ef974ca1-9638-45a8-9145-ea9d03b2b072",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from sagemaker.utils import name_from_base\n",
    "import boto3\n",
    "\n",
    "model_name = name_from_base(f\"chatglm2\") #Note: Need to specify model_name\n",
    "print(model_name)\n",
    "print(f\"Image going to be used is ---- > {inference_image_uri}\")\n",
    "\n",
    "create_model_response = sm_client.create_model(\n",
    "    ModelName=model_name,\n",
    "    ExecutionRoleArn=role,\n",
    "    PrimaryContainer={\n",
    "        \"Image\": inference_image_uri,\n",
    "        \"ModelDataUrl\": s3_code_artifact\n",
    "    },\n",
    "    \n",
    ")\n",
    "model_arn = create_model_response[\"ModelArn\"]\n",
    "\n",
    "print(f\"Created Model: {model_arn}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "233bb3a4-d737-41ad-8fcc-7082c6278e8c",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "endpoint_config_name = f\"{model_name}-config\"\n",
    "endpoint_name = f\"{model_name}-endpoint\"\n",
    "\n",
    "#Note: ml.g4dn.2xlarge 也可以选择\n",
    "endpoint_config_response = sm_client.create_endpoint_config(\n",
    "    EndpointConfigName=endpoint_config_name,\n",
    "    ProductionVariants=[\n",
    "        {\n",
    "            \"VariantName\": \"variant1\",\n",
    "            \"ModelName\": model_name,\n",
    "            \"InstanceType\": \"ml.g5.2xlarge\",\n",
    "            \"InitialInstanceCount\": 1,\n",
    "            # \"VolumeSizeInGB\" : 400,\n",
    "            # \"ModelDataDownloadTimeoutInSeconds\": 2400,\n",
    "            \"ContainerStartupHealthCheckTimeoutInSeconds\": 15*60,\n",
    "        },\n",
    "    ],\n",
    ")\n",
    "endpoint_config_response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "734a39b0-473e-4421-94c8-74d2b4105038",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "create_endpoint_response = sm_client.create_endpoint(\n",
    "    EndpointName=f\"{endpoint_name}\", EndpointConfigName=endpoint_config_name\n",
    ")\n",
    "print(f\"Created Endpoint: {create_endpoint_response['EndpointArn']}\")"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "1262e826-a810-401d-a5a9-f62febb24e5f",
   "metadata": {},
   "source": [
    "#### 持续检测模型部署进度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "08969928-6b9e-4d9c-a033-a31f5f77bdfb",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import time\n",
    "\n",
    "resp = sm_client.describe_endpoint(EndpointName=endpoint_name)\n",
    "status = resp[\"EndpointStatus\"]\n",
    "print(\"Status: \" + status)\n",
    "\n",
    "while status == \"Creating\":\n",
    "    time.sleep(60)\n",
    "    resp = sm_client.describe_endpoint(EndpointName=endpoint_name)\n",
    "    status = resp[\"EndpointStatus\"]\n",
    "    print(\"Status: \" + status)\n",
    "\n",
    "print(\"Arn: \" + resp[\"EndpointArn\"])\n",
    "print(\"Status: \" + status)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "d985b427-3959-46f7-9a50-5a2b45e2d513",
   "metadata": {},
   "source": [
    "### 5. 模型测试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e56bfdaa-3469-4784-aa8a-e32177cde3f2",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "%%time\n",
    "import json\n",
    "import boto3\n",
    "\n",
    "smr_client = boto3.client(\"sagemaker-runtime\")\n",
    "\n",
    "parameters = {\n",
    "  \"max_length\": 2048,\n",
    "  \"temperature\": 0.01,\n",
    "  \"num_beams\": 1, # >1可能会报错，\"probability tensor contains either `inf`, `nan` or element < 0\"； 即使remove_invalid_values=True也不能解决\n",
    "  \"do_sample\": False,\n",
    "  \"top_p\": 0.7,\n",
    "  \"logits_processor\" : None,\n",
    "  # \"remove_invalid_values\" : True\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bd86caf8-01e9-41eb-9199-478ba471a02b",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "prompts1 = \"\"\"你是技术领域的智能问答机器人AIBot，请严格根据反括号中的资料提取相关信息，回答用户的各种问题\n",
    "```\n",
    "作业调度工具：Crontab、Airflow、TaskCTL、Moia、Oozie\\n\\n(2).ETL流程的设计\\n\\nETL是将数据从源端经过抽取（extract）、转换（transform）、加载（load）至目的端的一个数据流动过程；对于数据仓库的建设，ETL是其中一个比较\\n\\n重要的环节，通常会根据以下步骤来完成ETL的设计：\\n\\n1.业务场景的分析\\n\\n2.源数据的抽取策略及数据的转换规则\\n\\n3.数据流程的控制\\n\\n4.数据质量校验和ETL作业监控\\n\\n(3).数据模型的设计\\n\\n数据模型是指使用使用实体、属性及其关系对企业运营和逻辑规则进行统一的定义、编码和命名；可以通过对实体和实体之间关系的定义和描述，来表达具体业务之间的关系；数据仓库模型是数据模型中针对特定的数据仓库应用系统的一种特定的数据模型。\\n\\n1.数据仓库数据模型的作用：\\n\\n11_文档资料\\n\\n12_版本上线\\n\\n13_数据资产\\n\\n14_服务维护\\n\\n15_项目\\n\\n16_AI业务\\n\\n17_测试相关\\n\\n18_问题跟踪\\n\\nE编辑\\n\\nW关注\\n\\nS分享\\n\\n\\n\\n工具\\n\\n技术中心\\n\\n数据智能部\\n\\n数据智能部 主页\\n\\n10_参考资料\\n\\n浅谈数据仓库体系\\n\\n转至元数据结尾\\n\\n\\n\\n\\n\\nCreated and last modified by  龙佟佟 on 十一月 15, 2018\\n\\n转至元数据起始\\n\\n数据仓库(Data Warehouse) 是一个面向主题的(Subject-Oriented) 、集成的( Integrate ) 、相对稳定的(Non-Volatile ) 、反映历史变化( Time-Variant) 的数据集合用于支持管理决策。\\n\\n一、数据仓库设计方法论\\n\\n2.ETL任务调度信息、输入输出：由于数据中心的作业都是采用crontab工具调度kettle作业，但任务调度信息不好获取，目前采用解析kettle日志的方式来获取跑批任务的调度信息且信息存储在mysql数据库的t99_sys_etl_job_information表中；后续ETL作业迁移到airflow上时任务调度信息可以直接在airflow的元数据库中获取（可视化展示 Neo4j）。\\n\\n3.表依赖映射关系 ：目前采用人工维护的方式并存储到mysql数据库的t99_sys_etl_table_base_info表中；后续将完善相关信息（表数据大小，数据热度，更新频率等）。\\n\\n4.数据仓库的模型定义：数据中心的数据模型不多，目前也只是采用人工手动维护的方式来管理的；由于模型都是通过SQL的方式进行数据逻辑加工的，后续可以采用\\n\\n作业调度工具：Crontab、Airflow、TaskCTL、Moia、Oozie\\n\\n(2).ETL流程的设计\\n\\nETL是将数据从源端经过抽取（extract）、转换（transform）、加载（load）至目的端的一个数据流动过程；对于数据仓库的建设，ETL是其中一个比较\\n\\n重要的环节，通常会根据以下步骤来完成ETL的设计：\\n\\n1.业务场景的分析\\n\\n2.源数据的抽取策略及数据的转换规则\\n\\n3.数据流程的控制\\n\\n4.数据质量校验和ETL作业监控\\n\\n(3).数据模型的设计\\n\\n数据模型是指使用使用实体、属性及其关系对企业运营和逻辑规则进行统一的定义、编码和命名；可以通过对实体和实体之间关系的定义和描述，来表达具体业务之间的关系；数据仓库模型是数据模型中针对特定的数据仓库应用系统的一种特定的数据模型。\\n\\n1.数据仓库数据模型的作用：\n",
    "```\n",
    "用户: 数据仓库构建有哪些作业调度工具?\n",
    "AIBot:\"\"\"\n",
    "response_model = smr_client.invoke_endpoint(\n",
    "            EndpointName=endpoint_name,\n",
    "            Body=json.dumps(\n",
    "            {\n",
    "                \"inputs\": prompts1,\n",
    "                \"parameters\": parameters,\n",
    "                \"history\" : []\n",
    "            }\n",
    "            ),\n",
    "            ContentType=\"application/json\",\n",
    "        )\n",
    "\n",
    "response_model['Body'].read().decode('utf8')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a6afe140-25a8-4c7f-a797-1f8427348d04",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "prompt=\"\"\"你是技术领域的智能问答机器人AIBot，请严格根据反括号中的资料提取相关信息，回答用户的各种问题\n",
    "```\n",
    "\\n11_文档资料\\n\\n12_版本上线\\n\\n13_数据资产\\n\\n14_服务维护\\n\\n15_项目\\n\\n16_AI业务\\n\\n17_测试相关\\n\\n18_问题跟踪\\n\\nE编辑\\n\\nW关注\\n\\nS分享\\n\\n\\n\\n工具\\n\\n技术中心\\n\\n数据智能部\\n\\n数据智能部 主页\\n\\n10_参考资料\\n\\n浅谈数据仓库体系\\n\\n转至元数据结尾\\n\\n\\n\\n\\n\\nCreated and last modified by  龙佟佟 on 十一月 15, 2018\\n\\n转至元数据起始\\n\\n数据仓库(Data Warehouse) 是一个面向主题的(Subject-Oriented) 、集成的( Integrate ) 、相对稳定的(Non-Volatile ) 、反映历史变化( Time-Variant) 的数据集合用于支持管理决策。\\n\\n一、数据仓库设计方法论\\n\\n作业调度工具：Crontab、Airflow、TaskCTL、Moia、Oozie\\n\\n(2).ETL流程的设计\\n\\nETL是将数据从源端经过抽取（extract）、转换（transform）、加载（load）至目的端的一个数据流动过程；对于数据仓库的建设，ETL是其中一个比较\\n\\n重要的环节，通常会根据以下步骤来完成ETL的设计：\\n\\n1.业务场景的分析\\n\\n2.源数据的抽取策略及数据的转换规则\\n\\n3.数据流程的控制\\n\\n4.数据质量校验和ETL作业监控\\n\\n(3).数据模型的设计\\n\\n数据模型是指使用使用实体、属性及其关系对企业运营和逻辑规则进行统一的定义、编码和命名；可以通过对实体和实体之间关系的定义和描述，来表达具体业务之间的关系；数据仓库模型是数据模型中针对特定的数据仓库应用系统的一种特定的数据模型。\\n\\n1.数据仓库数据模型的作用：\\n\\n每种实现方法都有利弊，通常情况是两种的组合；因为数据仓库在开始构建时一般是用自底向上的方法进行的。当在使用阶段性数据仓库项目模型来构建业务范围架构中的一系列数据集市时，可以一个 接一个地集成不同业务主题领域中的数据集市，从而形成设计良好的业务数据仓库体系结构。\\n\\n二、数据仓库的搭建(传统型数仓)\\n\\n数据仓库是一个过程而不是一个项目，一般情况下可以按照以下四个步骤去搭建一套数据仓库；\\n\\n需求/现状 --> 架构 --> 实现 --> 测试/发布\\n\\n1.需求\\n\\n在搭建数仓前都会有前期的需求调研和分析准备工作，并从各个部门的角度定义计划和需求；其中需求可以大致分为以下三类：\\n\\n(1)业务需求:一般指数据仓库最终用户的信息需求；\\n\\n(2)合规需求:一般是企业上报的数据需求；\\n\\n(3)ETL需求:指数仓建设过程中的数据需求，如数据质量评估（可以更好的了解数据内容、结构和质量）。\\n\\n2.架构\\n\\n(1)当前架构\\n\\n数据仓库通常采用分层体系架构,一般简单的数据仓库会分为三层，贴源层 ,历史存储层,数据模型层：\\n\\n<1>贴源层：把源库中的数据同步到数据仓中来，一般都会与源体系数据保持一致（一方面能够保证我们数据仓库的数据来源是真实存在的，另一方面在出了问题方便进行核对)。\\n\\n对于贴源层抽取源系统的数据，一般有两种方式：1.全量抽取  2.增量抽取\\n\\n<2>历史层：同步贴源层数据并保存历史变化的数据;该层通常都会采用FS-LDM主题域模型来对表进行划分。\\n\\n对于历史层来说，一般有三种存储方式:1.增量切片 2.全量切片 3.增量拉链 （具体表设计需要根据业务，存储和使用来综合评估）\n",
    "```\n",
    "用户: 数据仓库搭建的需求有哪几类\n",
    "AIBot: \"\"\"\n",
    "\n",
    "response_model = smr_client.invoke_endpoint(\n",
    "            EndpointName=endpoint_name,\n",
    "            Body=json.dumps(\n",
    "            {\n",
    "                \"inputs\": prompt,\n",
    "                \"parameters\": parameters,\n",
    "                \"history\" : []\n",
    "            }\n",
    "            ),\n",
    "            ContentType=\"application/json\",\n",
    "        )\n",
    "\n",
    "response_model['Body'].read().decode('utf8')"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "21c8b703-e312-4964-8be9-a754468e07cd",
   "metadata": {},
   "source": [
    "#### 清除模型Endpoint和config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f70d116f-4fb1-4f04-8732-3d6e4fb520de",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!aws sagemaker delete-endpoint --endpoint-name chatglm2-2023-06-27-06-53-41-986-endpoint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "184e4d1d-3d62-43df-9b17-5d64ece928bd",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "!aws sagemaker delete-endpoint-config --endpoint-config-name chatglm2-2023-06-27-06-53-41-986-config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "707e8f09",
   "metadata": {},
   "outputs": [],
   "source": [
    "!aws sagemaker delete-model --model-name chatglm2-2023-06-27-06-53-41-986"
   ]
  }
 ],
 "metadata": {
  "availableInstances": [
   {
    "_defaultOrder": 0,
    "_isFastLaunch": true,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 4,
    "name": "ml.t3.medium",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 1,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 8,
    "name": "ml.t3.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 2,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.t3.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 3,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.t3.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 4,
    "_isFastLaunch": true,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 8,
    "name": "ml.m5.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 5,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.m5.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 6,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.m5.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 7,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.m5.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 8,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.m5.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 9,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.m5.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 10,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.m5.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 11,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 384,
    "name": "ml.m5.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 12,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 8,
    "name": "ml.m5d.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 13,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.m5d.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 14,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.m5d.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 15,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.m5d.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 16,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.m5d.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 17,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.m5d.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 18,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.m5d.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 19,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 384,
    "name": "ml.m5d.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 20,
    "_isFastLaunch": false,
    "category": "General purpose",
    "gpuNum": 0,
    "hideHardwareSpecs": true,
    "memoryGiB": 0,
    "name": "ml.geospatial.interactive",
    "supportedImageNames": [
     "sagemaker-geospatial-v1-0"
    ],
    "vcpuNum": 0
   },
   {
    "_defaultOrder": 21,
    "_isFastLaunch": true,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 4,
    "name": "ml.c5.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 22,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 8,
    "name": "ml.c5.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 23,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.c5.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 24,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.c5.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 25,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 72,
    "name": "ml.c5.9xlarge",
    "vcpuNum": 36
   },
   {
    "_defaultOrder": 26,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 96,
    "name": "ml.c5.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 27,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 144,
    "name": "ml.c5.18xlarge",
    "vcpuNum": 72
   },
   {
    "_defaultOrder": 28,
    "_isFastLaunch": false,
    "category": "Compute optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.c5.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 29,
    "_isFastLaunch": true,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.g4dn.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 30,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.g4dn.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 31,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.g4dn.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 32,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.g4dn.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 33,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 4,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.g4dn.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 34,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.g4dn.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 35,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 61,
    "name": "ml.p3.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 36,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 4,
    "hideHardwareSpecs": false,
    "memoryGiB": 244,
    "name": "ml.p3.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 37,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 488,
    "name": "ml.p3.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 38,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 768,
    "name": "ml.p3dn.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 39,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.r5.large",
    "vcpuNum": 2
   },
   {
    "_defaultOrder": 40,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.r5.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 41,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.r5.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 42,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.r5.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 43,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.r5.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 44,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 384,
    "name": "ml.r5.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 45,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 512,
    "name": "ml.r5.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 46,
    "_isFastLaunch": false,
    "category": "Memory Optimized",
    "gpuNum": 0,
    "hideHardwareSpecs": false,
    "memoryGiB": 768,
    "name": "ml.r5.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 47,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 16,
    "name": "ml.g5.xlarge",
    "vcpuNum": 4
   },
   {
    "_defaultOrder": 48,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 32,
    "name": "ml.g5.2xlarge",
    "vcpuNum": 8
   },
   {
    "_defaultOrder": 49,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 64,
    "name": "ml.g5.4xlarge",
    "vcpuNum": 16
   },
   {
    "_defaultOrder": 50,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 128,
    "name": "ml.g5.8xlarge",
    "vcpuNum": 32
   },
   {
    "_defaultOrder": 51,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 1,
    "hideHardwareSpecs": false,
    "memoryGiB": 256,
    "name": "ml.g5.16xlarge",
    "vcpuNum": 64
   },
   {
    "_defaultOrder": 52,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 4,
    "hideHardwareSpecs": false,
    "memoryGiB": 192,
    "name": "ml.g5.12xlarge",
    "vcpuNum": 48
   },
   {
    "_defaultOrder": 53,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 4,
    "hideHardwareSpecs": false,
    "memoryGiB": 384,
    "name": "ml.g5.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 54,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 768,
    "name": "ml.g5.48xlarge",
    "vcpuNum": 192
   },
   {
    "_defaultOrder": 55,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 1152,
    "name": "ml.p4d.24xlarge",
    "vcpuNum": 96
   },
   {
    "_defaultOrder": 56,
    "_isFastLaunch": false,
    "category": "Accelerated computing",
    "gpuNum": 8,
    "hideHardwareSpecs": false,
    "memoryGiB": 1152,
    "name": "ml.p4de.24xlarge",
    "vcpuNum": 96
   }
  ],
  "instance_type": "ml.m5.large",
  "kernelspec": {
   "display_name": "Python 3 (Data Science 3.0)",
   "language": "python",
   "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-west-2:236514542706:image/sagemaker-data-science-310-v1"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
